import numpy as np import sys import matplotlib.pyplot as plt # type: ignore[import-untyped] import seaborn as sns # type: ignore[import-untyped] # --- CONFIGURATION & PRIORS --- # Accuracy levels 90%, 75%, 60%, 50% with same odds as before PRIOR_A = {0.9: 0.30, 0.75: 0.30, 0.6: 0.20, 0.5: 0.20} PRIOR_B = {0.9: 0.20, 0.75: 0.20, 0.6: 0.30, 0.5: 0.30} HORIZON = 20 SIMULATION_RUNS = 5000 sys.setrecursionlimit(2000) # --- 1. THE MATH ENGINE (Dynamic Programming) --- def get_expected_accuracy(prior): """Calculates expected value of an advisor given their current belief state.""" return sum(acc * prob for acc, prob in prior.items()) def update_prior(prior, success): """Bayesian update of the prior.""" new_prior = {} total_prob = 0.0 for acc, prob in prior.items(): likelihood = acc if success else (1 - acc) new_prior[acc] = prob * likelihood total_prob += new_prior[acc] for acc in new_prior: new_prior[acc] /= total_prob return new_prior # Memoization Dictionary memo = {} def solve_dp(sA, fA, sB, fB, priorA, priorB): """ Returns the Q-values (Expected Future Reward) for choosing A and B. State: (wins_A, losses_A, wins_B, losses_B) """ rounds_played = sA + fA + sB + fB if rounds_played >= HORIZON: return 0.0, 0.0 state_key = (sA, fA, sB, fB) if state_key in memo: return memo[state_key] # --- CALC VALUE FOR CHOOSING A --- exp_A = get_expected_accuracy(priorA) # Future if A succeeds pA_succ = update_prior(priorA, True) future_A_succ = max(solve_dp(sA + 1, fA, sB, fB, pA_succ, priorB)) # Future if A fails pA_fail = update_prior(priorA, False) future_A_fail = max(solve_dp(sA, fA + 1, sB, fB, pA_fail, priorB)) q_A = exp_A * (1 + future_A_succ) + (1 - exp_A) * (0 + future_A_fail) # --- CALC VALUE FOR CHOOSING B --- exp_B = get_expected_accuracy(priorB) # Future if B succeeds pB_succ = update_prior(priorB, True) future_B_succ = max(solve_dp(sA, fA, sB + 1, fB, priorA, pB_succ)) # Future if B fails pB_fail = update_prior(priorB, False) future_B_fail = max(solve_dp(sA, fA, sB, fB + 1, priorA, pB_fail)) q_B = exp_B * (1 + future_B_succ) + (1 - exp_B) * (0 + future_B_fail) memo[state_key] = (q_A, q_B) return q_A, q_B # --- 2. GENERATE DATA FOR PLOTS --- def generate_heatmap_data(): print("Generating Heatmap Data (Patience Frontier)...") # We look at the decision boundary assuming B is still "Fresh" (0 wins, 0 losses) grid_size = 14 diff_grid = np.zeros((grid_size, grid_size)) for w in range(grid_size): for l in range(grid_size): if w + l >= HORIZON: diff_grid[l, w] = np.nan continue # Reconstruct A's prior for this specific grid cell curr_pA = PRIOR_A.copy() for _ in range(w): curr_pA = update_prior(curr_pA, True) for _ in range(l): curr_pA = update_prior(curr_pA, False) # Get Q-values assuming B is fresh (0,0) qA, qB = solve_dp(w, l, 0, 0, curr_pA, PRIOR_B) # Store difference: Positive = Stay A, Negative = Switch B diff_grid[l, w] = qA - qB return diff_grid def run_simulation(): print(f"Running Monte Carlo Simulation ({SIMULATION_RUNS} runs)...") scores = [] for _ in range(SIMULATION_RUNS): # 1. Determine True Accuracies for this specific world true_A = np.random.choice(list(PRIOR_A.keys()), p=list(PRIOR_A.values())) true_B = np.random.choice(list(PRIOR_B.keys()), p=list(PRIOR_B.values())) # 2. Play the game sA, fA, sB, fB = 0, 0, 0, 0 pA, pB = PRIOR_A.copy(), PRIOR_B.copy() score = 0 for _ in range(HORIZON): qA, qB = solve_dp(sA, fA, sB, fB, pA, pB) # Greedy choice on Q-values choice = 'A' if qA >= qB else 'B' if choice == 'A': is_correct = np.random.rand() < true_A if is_correct: score += 1 sA += 1 pA = update_prior(pA, True) else: fA += 1 pA = update_prior(pA, False) else: is_correct = np.random.rand() < true_B if is_correct: score += 1 sB += 1 pB = update_prior(pB, True) else: fB += 1 pB = update_prior(pB, False) scores.append(score) return scores # --- 3. PLOTTING FUNCTION --- def create_thesis_plots(diff_grid, scores): print("Generating Plots...") sns.set_context("paper", font_scale=1.5) sns.set_style("whitegrid") # --- FIGURE 1: HEATMAP --- fig1, ax1 = plt.subplots(figsize=(8, 6)) # Custom Colormap: Red (Switch) to Blue (Stay) # Using a diverging palette centered at 0 cmap = sns.diverging_palette(10, 240, as_cmap=True, center="light") sns.heatmap(diff_grid, ax=ax1, cmap=cmap, center=0, annot=True, fmt=".1f", annot_kws={"size": 7}, cbar_kws={'label': 'Expected Advantage of A vs B'}, square=True, mask=np.isnan(diff_grid)) ax1.set_title("Optimal 'Patience Frontier' (Start of Block)", fontweight='bold') ax1.set_xlabel("Wins by Advisor A") ax1.set_ylabel("Losses by Advisor A") ax1.invert_yaxis() # Put 0 losses at bottom # Add Text Annotations for clarity ax1.text(0.5, 0.5, "START", color='black', ha='center', va='center', weight='bold', fontsize=10) ax1.text(0.5, 1.5, "SWITCH", color='darkred', ha='center', va='center', weight='bold', fontsize=9) plt.tight_layout() plt.savefig("Figure1_PatienceFrontier.png", dpi=300) print("Saved Figure1_PatienceFrontier.png") # --- FIGURE 2: HISTOGRAM --- fig2, ax2 = plt.subplots(figsize=(8, 6)) mean_score = np.mean(scores) sns.histplot(scores, bins=np.arange(4.5, 21.5, 1), kde=False, color="#2c7fb8", alpha=0.8, stat='percent') # Add Mean Line ax2.axvline(mean_score, color='red', linestyle='--', linewidth=2, label=f'Mean Score: {mean_score:.2f}') ax2.set_title(f"Performance Distribution (Optimal Agent)", fontweight='bold') ax2.set_xlabel("Total Correct Answers (out of 20)") ax2.set_ylabel("Frequency (%)") ax2.set_xticks(range(5, 21)) ax2.legend() plt.tight_layout() plt.savefig("Figure2_ScoreDistribution.png", dpi=300) print("Saved Figure2_ScoreDistribution.png") if __name__ == "__main__": # Run from the same environment where matplotlib/seaborn are installed, e.g.: # cd advisor_study_project/advisor_experiment && python Gittins.py # If you get ModuleNotFoundError for matplotlib/seaborn, install for that Python: # python -m pip install matplotlib seaborn numpy # 1. Populate Memoization Table (by running start state) print("Solving DP...") solve_dp(0, 0, 0, 0, PRIOR_A.copy(), PRIOR_B.copy()) # 2. Get Data heatmap_data = generate_heatmap_data() simulation_scores = run_simulation() # 3. Plot (skipped if matplotlib/seaborn not installed) create_thesis_plots(heatmap_data, simulation_scores)